Explanation

Brief

Welcome!

For this EDA, this research will use UK Road Safety: Traffic Accidents (2005-2017).

reference: https://www.kaggle.com/tsiaras/uk-road-safety-accidents-and-vehicles

Data’s Point of View

These files provide detailed road safety data about the circumstances of personal injury road accidents in GB from 1979, the types of vehicles involved and the consequential casualties. The statistics relate only to personal injury accidents on public roads that are reported to the police, and subsequently recorded, using the STATS19 accident reporting form.

Data Inspection

Library

library("lubridate")
library("dplyr")
library("ggpubr")
library("ggplot2")
library("scales")
library("RColorBrewer")
library("ggridges")
library("leaflet")

Data source:

traffic<-read.csv("Accident_Information.csv")

First 6 data

head(traffic)

last 6 data

tail(traffic)

Data dimension

dim(traffic)
## [1] 2047256      34

Column Names

names(traffic)
##  [1] "Accident_Index"                             
##  [2] "X1st_Road_Class"                            
##  [3] "X1st_Road_Number"                           
##  [4] "X2nd_Road_Class"                            
##  [5] "X2nd_Road_Number"                           
##  [6] "Accident_Severity"                          
##  [7] "Carriageway_Hazards"                        
##  [8] "Date"                                       
##  [9] "Day_of_Week"                                
## [10] "Did_Police_Officer_Attend_Scene_of_Accident"
## [11] "Junction_Control"                           
## [12] "Junction_Detail"                            
## [13] "Latitude"                                   
## [14] "Light_Conditions"                           
## [15] "Local_Authority_.District."                 
## [16] "Local_Authority_.Highway."                  
## [17] "Location_Easting_OSGR"                      
## [18] "Location_Northing_OSGR"                     
## [19] "Longitude"                                  
## [20] "LSOA_of_Accident_Location"                  
## [21] "Number_of_Casualties"                       
## [22] "Number_of_Vehicles"                         
## [23] "Pedestrian_Crossing.Human_Control"          
## [24] "Pedestrian_Crossing.Physical_Facilities"    
## [25] "Police_Force"                               
## [26] "Road_Surface_Conditions"                    
## [27] "Road_Type"                                  
## [28] "Special_Conditions_at_Site"                 
## [29] "Speed_limit"                                
## [30] "Time"                                       
## [31] "Urban_or_Rural_Area"                        
## [32] "Weather_Conditions"                         
## [33] "Year"                                       
## [34] "InScotland"

Data Cleansing & Coercion

check variable type

glimpse(traffic)
## Rows: 2,047,256
## Columns: 34
## $ Accident_Index                              <chr> "200501BS00001", "20050...
## $ X1st_Road_Class                             <chr> "A", "B", "C", "A", "Un...
## $ X1st_Road_Number                            <int> 3218, 450, 0, 3220, 0, ...
## $ X2nd_Road_Class                             <chr> NA, "C", NA, NA, NA, NA...
## $ X2nd_Road_Number                            <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Accident_Severity                           <chr> "Serious", "Slight", "S...
## $ Carriageway_Hazards                         <chr> "None", "None", "None",...
## $ Date                                        <chr> "2005-01-04", "2005-01-...
## $ Day_of_Week                                 <chr> "Tuesday", "Wednesday",...
## $ Did_Police_Officer_Attend_Scene_of_Accident <int> 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Junction_Control                            <chr> "Data missing or out of...
## $ Junction_Detail                             <chr> "Not at junction or wit...
## $ Latitude                                    <dbl> 51.48910, 51.52007, 51....
## $ Light_Conditions                            <chr> "Daylight", "Darkness -...
## $ Local_Authority_.District.                  <chr> "Kensington and Chelsea...
## $ Local_Authority_.Highway.                   <chr> "Kensington and Chelsea...
## $ Location_Easting_OSGR                       <int> 525680, 524170, 524520,...
## $ Location_Northing_OSGR                      <int> 178240, 181650, 182240,...
## $ Longitude                                   <dbl> -0.191170, -0.211708, -...
## $ LSOA_of_Accident_Location                   <chr> "E01002849", "E01002909...
## $ Number_of_Casualties                        <int> 1, 1, 1, 1, 1, 1, 1, 2,...
## $ Number_of_Vehicles                          <int> 1, 1, 2, 1, 1, 2, 2, 1,...
## $ Pedestrian_Crossing.Human_Control           <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Pedestrian_Crossing.Physical_Facilities     <int> 1, 5, 0, 0, 0, 0, 0, 0,...
## $ Police_Force                                <chr> "Metropolitan Police", ...
## $ Road_Surface_Conditions                     <chr> "Wet or damp", "Dry", "...
## $ Road_Type                                   <chr> "Single carriageway", "...
## $ Special_Conditions_at_Site                  <chr> "None", "None", "None",...
## $ Speed_limit                                 <int> 30, 30, 30, 30, 30, 30,...
## $ Time                                        <chr> "17:42", "17:36", "00:1...
## $ Urban_or_Rural_Area                         <chr> "Urban", "Urban", "Urba...
## $ Weather_Conditions                          <chr> "Raining no high winds"...
## $ Year                                        <int> 2005, 2005, 2005, 2005,...
## $ InScotland                                  <chr> "No", "No", "No", "No",...

Data Coertion

Change to Factor/ Categorical

traffic$X1st_Road_Class <- as.factor(traffic$X1st_Road_Class)
traffic$X1st_Road_Number <- as.factor(traffic$X1st_Road_Number)
traffic$X2nd_Road_Class <- as.factor(traffic$X2nd_Road_Class)
traffic$X2nd_Road_Number <- as.factor(traffic$X2nd_Road_Number)
traffic$Accident_Severity <- as.factor(traffic$Accident_Severity)
traffic$Carriageway_Hazards <- as.factor(traffic$Carriageway_Hazards)
traffic$Day_of_Week <- as.factor(traffic$Day_of_Week)
traffic$Did_Police_Officer_Attend_Scene_of_Accident <- as.factor(traffic$Did_Police_Officer_Attend_Scene_of_Accident)
traffic$Junction_Control <- as.factor(traffic$Junction_Control)
traffic$Junction_Detail <- as.factor(traffic$Junction_Detail)
traffic$Light_Conditions <- as.factor(traffic$Light_Conditions)
traffic$Local_Authority_.District. <- as.factor(traffic$Local_Authority_.District.)
traffic$Local_Authority_.Highway. <- as.factor(traffic$Local_Authority_.Highway.)
traffic$Location_Easting_OSGR <- as.factor(traffic$Location_Easting_OSGR)
traffic$Location_Northing_OSGR <- as.factor(traffic$Location_Northing_OSGR)
traffic$LSOA_of_Accident_Location <- as.factor(traffic$LSOA_of_Accident_Location)
traffic$Police_Force <- as.factor(traffic$Police_Force)
traffic$Road_Surface_Conditions <- as.factor(traffic$Road_Surface_Conditions)
traffic$Road_Type <- as.factor(traffic$Road_Type)
traffic$Special_Conditions_at_Site <- as.factor(traffic$Special_Conditions_at_Site)
traffic$Speed_limit <- as.factor(traffic$Speed_limit)
traffic$Urban_or_Rural_Area <- as.factor(traffic$Urban_or_Rural_Area)
traffic$Weather_Conditions <- as.factor(traffic$Weather_Conditions)
traffic$InScotland <- as.factor(traffic$InScotland)
traffic$Year <- as.factor(traffic$Year)

Change to Date

traffic$Date <- ymd(traffic$Date)
traffic$Time <- hm(traffic$Time)

Check variable type again

str(traffic)
## 'data.frame':    2047256 obs. of  34 variables:
##  $ Accident_Index                             : chr  "200501BS00001" "200501BS00002" "200501BS00003" "200501BS00004" ...
##  $ X1st_Road_Class                            : Factor w/ 6 levels "A","A(M)","B",..: 1 3 4 1 6 6 4 1 1 3 ...
##  $ X1st_Road_Number                           : Factor w/ 7160 levels "0","1","2","3",..: 2915 451 1 2917 1 1 1 316 2909 451 ...
##  $ X2nd_Road_Class                            : Factor w/ 6 levels "A","A(M)","B",..: NA 4 NA NA NA NA 6 NA 3 4 ...
##  $ X2nd_Road_Number                           : Factor w/ 7512 levels "0","1","2","3",..: 1 1 1 1 1 1 1 1 305 1 ...
##  $ Accident_Severity                          : Factor w/ 3 levels "Fatal","Serious",..: 2 3 3 3 3 3 3 3 3 3 ...
##  $ Carriageway_Hazards                        : Factor w/ 7 levels "Any animal in carriageway (except ridden horse)",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Date                                       : Date, format: "2005-01-04" "2005-01-05" ...
##  $ Day_of_Week                                : Factor w/ 7 levels "Friday","Monday",..: 6 7 5 1 2 6 5 1 3 3 ...
##  $ Did_Police_Officer_Attend_Scene_of_Accident: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Junction_Control                           : Factor w/ 6 levels "Authorised person",..: 3 2 3 3 3 3 4 3 2 4 ...
##  $ Junction_Detail                            : Factor w/ 10 levels "Crossroads","Data missing or out of range",..: 5 1 5 5 5 5 10 5 1 10 ...
##  $ Latitude                                   : num  51.5 51.5 51.5 51.5 51.5 ...
##  $ Light_Conditions                           : Factor w/ 6 levels "Darkness - lighting unknown",..: 6 2 2 6 1 6 2 6 2 6 ...
##  $ Local_Authority_.District.                 : Factor w/ 416 levels "Aberdeen City",..: 183 183 183 183 183 183 183 183 183 183 ...
##  $ Local_Authority_.Highway.                  : Factor w/ 207 levels "Aberdeen City",..: 90 90 90 90 90 90 90 90 90 90 ...
##  $ Location_Easting_OSGR                      : Factor w/ 279456 levels "64950","64980",..: 223024 221891 222168 224082 225049 222341 221931 223193 224470 222187 ...
##  $ Location_Northing_OSGR                     : Factor w/ 344815 levels "10290","10304",..: 82209 85082 85526 81630 82846 84676 84381 83428 81717 84362 ...
##  $ Longitude                                  : num  -0.191 -0.212 -0.206 -0.174 -0.157 ...
##  $ LSOA_of_Accident_Location                  : Factor w/ 35565 levels "","E01000001",..: 2848 2908 2856 2839 2862 2831 2874 2888 2899 2874 ...
##  $ Number_of_Casualties                       : int  1 1 1 1 1 1 1 2 2 5 ...
##  $ Number_of_Vehicles                         : int  1 1 2 1 1 2 2 1 2 2 ...
##  $ Pedestrian_Crossing.Human_Control          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Pedestrian_Crossing.Physical_Facilities    : int  1 5 0 0 0 0 0 0 5 8 ...
##  $ Police_Force                               : Factor w/ 51 levels "Avon and Somerset",..: 30 30 30 30 30 30 30 30 30 30 ...
##  $ Road_Surface_Conditions                    : Factor w/ 6 levels "Data missing or out of range",..: 6 2 2 2 6 6 2 2 2 2 ...
##  $ Road_Type                                  : Factor w/ 7 levels "Data missing or out of range",..: 5 2 5 5 5 5 5 2 5 5 ...
##  $ Special_Conditions_at_Site                 : Factor w/ 9 levels "Auto signal part defective",..: 5 5 5 5 5 6 5 5 5 5 ...
##  $ Speed_limit                                : Factor w/ 9 levels "0","10","15",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ Time                                       :Formal class 'Period' [package "lubridate"] with 6 slots
##   .. ..@ .Data : num  0 0 0 0 0 0 0 0 0 0 ...
##   .. ..@ year  : num  0 0 0 0 0 0 0 0 0 0 ...
##   .. ..@ month : num  0 0 0 0 0 0 0 0 0 0 ...
##   .. ..@ day   : num  0 0 0 0 0 0 0 0 0 0 ...
##   .. ..@ hour  : num  17 17 0 10 21 12 20 17 22 16 ...
##   .. ..@ minute: num  42 36 15 35 13 40 40 35 43 0 ...
##  $ Urban_or_Rural_Area                        : Factor w/ 3 levels "Rural","Unallocated",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Weather_Conditions                         : Factor w/ 10 levels "Data missing or out of range",..: 7 3 3 3 3 7 3 3 3 3 ...
##  $ Year                                       : Factor w/ 13 levels "2005","2006",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ InScotland                                 : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
traffic

Missing Value

Showed any column that has NULL values

colSums(is.na(traffic))
##                              Accident_Index 
##                                           0 
##                             X1st_Road_Class 
##                                           0 
##                            X1st_Road_Number 
##                                           2 
##                             X2nd_Road_Class 
##                                      844272 
##                            X2nd_Road_Number 
##                                       17593 
##                           Accident_Severity 
##                                           0 
##                         Carriageway_Hazards 
##                                           0 
##                                        Date 
##                                           0 
##                                 Day_of_Week 
##                                           0 
## Did_Police_Officer_Attend_Scene_of_Accident 
##                                         278 
##                            Junction_Control 
##                                           0 
##                             Junction_Detail 
##                                           0 
##                                    Latitude 
##                                         174 
##                            Light_Conditions 
##                                           0 
##                  Local_Authority_.District. 
##                                           0 
##                   Local_Authority_.Highway. 
##                                           0 
##                       Location_Easting_OSGR 
##                                         164 
##                      Location_Northing_OSGR 
##                                         164 
##                                   Longitude 
##                                         175 
##                   LSOA_of_Accident_Location 
##                                           0 
##                        Number_of_Casualties 
##                                           0 
##                          Number_of_Vehicles 
##                                           0 
##           Pedestrian_Crossing.Human_Control 
##                                        2920 
##     Pedestrian_Crossing.Physical_Facilities 
##                                        3560 
##                                Police_Force 
##                                           0 
##                     Road_Surface_Conditions 
##                                           0 
##                                   Road_Type 
##                                           0 
##                  Special_Conditions_at_Site 
##                                           0 
##                                 Speed_limit 
##                                          37 
##                                        Time 
##                                         156 
##                         Urban_or_Rural_Area 
##                                           0 
##                          Weather_Conditions 
##                                           0 
##                                        Year 
##                                           0 
##                                  InScotland 
##                                          53

Remove Column that Unrelated to This Research

traffic <- subset(traffic, select = -c(X1st_Road_Class, 
                                       X1st_Road_Number, 
                                       X2nd_Road_Class, 
                                       X2nd_Road_Number, 
                                       Location_Easting_OSGR, 
                                       Location_Northing_OSGR))

Remove Row that has NA value

traffic <- na.omit(traffic, cols = c("Did_Police_Officer_Attend_Scene_of_Accident", 
                                     "Longitude", "Pedestrian_Crossing.Human_Control", 
                                     "Pedestrian_Crossing.Physical_Facilities", 
                                     "Speed_limit", 
                                     "InScotland"))

Check The Table Again

traffic

Data Explanation

Brief Explanation

glimpse(traffic)
## Rows: 2,042,570
## Columns: 28
## $ Accident_Index                              <chr> "200501BS00001", "20050...
## $ Accident_Severity                           <fct> Serious, Slight, Slight...
## $ Carriageway_Hazards                         <fct> None, None, None, None,...
## $ Date                                        <date> 2005-01-04, 2005-01-05...
## $ Day_of_Week                                 <fct> Tuesday, Wednesday, Thu...
## $ Did_Police_Officer_Attend_Scene_of_Accident <fct> 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Junction_Control                            <fct> Data missing or out of ...
## $ Junction_Detail                             <fct> Not at junction or with...
## $ Latitude                                    <dbl> 51.48910, 51.52007, 51....
## $ Light_Conditions                            <fct> Daylight, Darkness - li...
## $ Local_Authority_.District.                  <fct> Kensington and Chelsea,...
## $ Local_Authority_.Highway.                   <fct> Kensington and Chelsea,...
## $ Longitude                                   <dbl> -0.191170, -0.211708, -...
## $ LSOA_of_Accident_Location                   <fct> E01002849, E01002909, E...
## $ Number_of_Casualties                        <int> 1, 1, 1, 1, 1, 1, 1, 2,...
## $ Number_of_Vehicles                          <int> 1, 1, 2, 1, 1, 2, 2, 1,...
## $ Pedestrian_Crossing.Human_Control           <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Pedestrian_Crossing.Physical_Facilities     <int> 1, 5, 0, 0, 0, 0, 0, 0,...
## $ Police_Force                                <fct> Metropolitan Police, Me...
## $ Road_Surface_Conditions                     <fct> Wet or damp, Dry, Dry, ...
## $ Road_Type                                   <fct> Single carriageway, Dua...
## $ Special_Conditions_at_Site                  <fct> None, None, None, None,...
## $ Speed_limit                                 <fct> 30, 30, 30, 30, 30, 30,...
## $ Time                                        <Period> 17H 42M 0S, 17H 36M ...
## $ Urban_or_Rural_Area                         <fct> Urban, Urban, Urban, Ur...
## $ Weather_Conditions                          <fct> Raining no high winds, ...
## $ Year                                        <fct> 2005, 2005, 2005, 2005,...
## $ InScotland                                  <fct> No, No, No, No, No, No,...

Overview

Which period mostly accident happens?

Year

traffic %>%
  group_by(Accident_Severity, Year) %>% 
  dplyr::summarise(n_Year = n()) %>% 
  arrange(- n_Year) %>% 
  ggplot(aes(x = Year,
             y = n_Year,
             fill = Accident_Severity)) +
  geom_col(position = "stack", stat = "identity") +
  theme_bw() +
  scale_fill_manual(values = rev(brewer.pal(15, "Set3"))) +
  labs(title = "Traffic per Year X Accident Severity",
       subtitle = "(2005-17)",
       x = "Year",
       y = "Freq")

Day of Week

traffic %>%
  group_by(Accident_Severity, Day_of_Week) %>% 
  dplyr::summarise(n_Day = n()) %>% 
  arrange(- n_Day) %>% 
  ggplot(aes(reorder(x = Day_of_Week, -n_Day),
             y = n_Day,
             fill = Accident_Severity)) +
  geom_col(position = "stack", stat = "identity") +
  theme_bw() +
  scale_fill_manual(values = rev(brewer.pal(7, "Set3"))) +
  labs(title = "Traffic per Day X Accident Severity",
       subtitle = "(2005-17)",
       x = "Day of Week",
       y = "Freq")
## `summarise()` regrouping output by 'Accident_Severity' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: stat

Per Category: Create Data Frame

Junction Control

plot_Junction_Control <- traffic %>% 
  group_by(Accident_Severity, Junction_Control, Year) %>% 
  dplyr::summarise(n_Junction_Control = n()) %>% 
  arrange(- n_Junction_Control)

Junction Detail

plot_Junction_Detail <- traffic %>% 
  group_by(Accident_Severity, Junction_Detail, Year) %>% 
  dplyr::summarise(n_Junction_Detail = n()) %>% 
  arrange(- n_Junction_Detail)

Urban/Rural Area

plot_Urban_or_Rural_Area <- traffic %>% 
  group_by(Accident_Severity, Urban_or_Rural_Area, Year) %>% 
  dplyr::summarise(n_Urban_or_Rural_Area = n()) %>% 
  arrange(- n_Urban_or_Rural_Area)

Road Type

plot_Road_Type <- traffic %>% 
  group_by(Accident_Severity, Road_Type, Year) %>% 
  dplyr::summarise(n_Road_Type = n()) %>% 
  arrange(- n_Road_Type)

Road Survey COndition

plot_Road_Surface_Conditions <- traffic %>% 
  group_by(Accident_Severity, Road_Surface_Conditions, Year) %>% 
  dplyr::summarise(n_Road_Surface_Conditions = n()) %>% 
  arrange(- n_Road_Surface_Conditions)

Speed Limit

plot_Speed_limit <- traffic %>% 
  group_by(Accident_Severity, Speed_limit, Year) %>% 
  dplyr::summarise(n_Speed_limit = n()) %>% 
  arrange(- n_Speed_limit)

Weather Condition

plot_Weather_Conditions <- traffic %>% 
  group_by(Accident_Severity, Weather_Conditions, Year) %>% 
  dplyr::summarise(n_Weather_Conditions = n()) %>% 
  arrange(- n_Weather_Conditions)

Light Condition

plot_Light_Conditions <- traffic %>% 
  group_by(Accident_Severity, Light_Conditions, Year) %>% 
  dplyr::summarise(n_Light_Conditions = n()) %>% 
  arrange(- n_Light_Conditions)

Per Category: Which Variable Has The Most Impact?

Junction Control

plot_Junction_Control %>%
  filter(Junction_Control != "Data missing or out of range") %>% 
  ggplot(aes(fill=Junction_Control, y=n_Junction_Control, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Junction Control",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(5, "Set3")))

Junction Detail

plot_Junction_Detail %>%
  ggplot(aes(fill=Junction_Detail, y=n_Junction_Detail, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Junction Detail",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(10, "Set3")))

Urban/Rural Area

plot_Urban_or_Rural_Area %>%
  ggplot(aes(fill=Urban_or_Rural_Area, y=n_Urban_or_Rural_Area, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Urban/Rural Area",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(10, "Set3")))

Road Type

plot_Road_Type  %>%
  ggplot(aes(fill=Road_Type, y=n_Road_Type, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Road Type",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(10, "Set3")))

Road Survey Condition

plot_Road_Surface_Conditions %>%
  ggplot(aes(fill=Road_Surface_Conditions, y=n_Road_Surface_Conditions, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Road Survey Condition",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(10, "Set3")))

Speed Limit

plot_Speed_limit %>%
  ggplot(aes(fill=Speed_limit, y=n_Speed_limit, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Speed Limit",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(10, "Set3")))

Weather Condition

plot_Weather_Conditions %>%
  ggplot(aes(fill=Weather_Conditions, y=n_Weather_Conditions, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Weather Condition",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(15, "Set3")))

Light Condition

plot_Light_Conditions %>%
  ggplot(aes(fill=Light_Conditions, y=n_Light_Conditions, x=Accident_Severity)) + 
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  labs(title = "Accident Severity X Light Condition",
       subtitle = "(2005-17)",
       x = "Accident Severity",
       y = "Percentage") +
  scale_fill_manual(values = rev(brewer.pal(15, "Set3")))

5. Data Insight

CASE 1

TOP 5 LOCAL AUTHORITY & POLICE FORCE THAT MANAGE FATAL ACCIDENT

Trying to use leaflet

n_Year <- traffic %>%
  group_by(Accident_Severity, Longitude, Latitude) %>% 
  dplyr::summarise(n_traffic = n()) %>% 
  arrange(- n_traffic)

leaflet() %>% 
  addTiles() %>% 
  setView( lng = -1.9, lat = 55, zoom = 5 ) %>% 
  addProviderTiles("Esri.WorldGrayCanvas") %>% 
  ggplot() +
  geom_polygon(data = traffic, 
               aes(x = Longitude, 
                   y = Latitude, 
                   group = Accident_Severity), 
               fill="grey", 
               alpha=0.3) +
  geom_point( data = traffic, 
              aes(x = Longitude, 
                  y = Latitude,
                  size = n_traffic,
                  alpha=Accident_Severity)) +
  geom_text_repel( data=traffic %>% 
                     arrange(Accident_Severity) %>% 
                     tail(10), aes(x=Longitude, 
                                   y=Latitude, 
                                   label=Accident_Severity), 
                   size=5) +
  geom_point( data=traffic %>% 
                arrange(Accident_Severity) %>% 
                tail(10), 
              aes(x=Longitude, 
                  y=Latitude), 
              color="red", 
              size=3) +
  theme_void() + 
  ylim(50,59) + 
  coord_map() +
  theme(legend.position="none")
  
  • answers for the question
traffic %>% 
  filter(Accident_Severity == "Fatal") %>% 
  group_by(Local_Authority_.District., Police_Force) %>% 
  dplyr::count(sort = TRUE) %>% 
  head(5)

City of Birmingham under West Midlands Police Force handle the most accident from 2005 - 2017

CASE 2

IS SCOTLAND MORE DANGEROUS?

traffic %>% 
  filter(Accident_Severity == "Fatal") %>%
  group_by(InScotland) %>% 
  dplyr::count(sort = TRUE) %>% 
  head(5)

Scotland’s number of Fatal accident is almost a 1/10 than Outside them